# Import Packages
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
import altair as alt
import us.states
# Read in data
flight = pd.read_csv("On_Time_Marketing_Carrier_On_Time_Performance_2022_12_final.csv")
flight
| Year | Month | DayofMonth | DayOfWeek | FlightDate | Marketing_Airline_Network | DOT_ID_Marketing_Airline | Flight_Number_Marketing_Airline | Origin | OriginCityName | ... | DistanceGroup | CarrierDelay | WeatherDelay | NASDelay | SecurityDelay | LateAircraftDelay | FirstDepTime | TotalAddGTime | LongestAddGTime | DivAirportLandings | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | 12 | 19 | 1 | 12/19/2022 | DL | 19790 | 4628 | BDL | Hartford, CT | ... | 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 1 | 2022 | 12 | 20 | 2 | 12/20/2022 | DL | 19790 | 4628 | BDL | Hartford, CT | ... | 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 2 | 2022 | 12 | 21 | 3 | 12/21/2022 | DL | 19790 | 4628 | BDL | Hartford, CT | ... | 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 3 | 2022 | 12 | 22 | 4 | 12/22/2022 | DL | 19790 | 4628 | BDL | Hartford, CT | ... | 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 4 | 2022 | 12 | 23 | 5 | 12/23/2022 | DL | 19790 | 4628 | BDL | Hartford, CT | ... | 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 576822 | 2022 | 12 | 27 | 2 | 12/27/2022 | NK | 20416 | 913 | MCI | Kansas City, MO | ... | 6 | 22.0 | 0.0 | 1.0 | 0.0 | 14.0 | NaN | NaN | NaN | 0 |
| 576823 | 2022 | 12 | 28 | 3 | 12/28/2022 | NK | 20416 | 913 | MCI | Kansas City, MO | ... | 6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 576824 | 2022 | 12 | 29 | 4 | 12/29/2022 | NK | 20416 | 913 | MCI | Kansas City, MO | ... | 6 | 0.0 | 0.0 | 11.0 | 0.0 | 11.0 | NaN | NaN | NaN | 0 |
| 576825 | 2022 | 12 | 30 | 5 | 12/30/2022 | NK | 20416 | 913 | MCI | Kansas City, MO | ... | 6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 576826 | 2022 | 12 | 31 | 6 | 12/31/2022 | NK | 20416 | 913 | MCI | Kansas City, MO | ... | 6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
576827 rows × 50 columns
df = flight
Create a bar chart to show the number of flights on each day of the week. This can help identify which days have the most and least flights.
import plotly.express as px
import pandas as pd
day_counts = df["DayOfWeek"].value_counts().sort_index()
fig = px.bar(day_counts, x=day_counts.index, y=day_counts.values, labels={"x": "Day of Week", "y": "Number of Flights"})
# Customize x-axis tick labels and title
fig.update_xaxes(
tickvals=list(range(1, 8)),
ticktext=["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"],
title="Days of the Week"
)
fig.show()
Create a bar chart to show the number of flights per airline. This can help identify which airlines have the most flights in the dataset.
airline_counts = df["Marketing_Airline_Network"].value_counts()
fig = px.bar(airline_counts, x=airline_counts.index, y=airline_counts.values, labels={"x": "Airline", "y": "Number of Flights"})
# Add a title to the x-axis
fig.update_xaxes(title="Airlines")
fig.show()
Create a bar chart to visualize the average delay time for each airline.
average_delays = df.groupby("Marketing_Airline_Network")["DepDelayMinutes"].mean()
fig = px.bar(average_delays, x=average_delays.index, y=average_delays.values, labels={"x": "Airline", "y": "Average Delay (minutes)"})
# Add a title to the x-axis
fig.update_xaxes(title="Airlines")
fig.show()
Create a bar chart or pie chart to show the number of flights per origin city. This can help identify which cities have the most flights in the dataset.
city_counts = df["OriginCityName"].value_counts()
fig = px.bar(city_counts, x=city_counts.index, y=city_counts.values, labels={"x": "Origin City", "y": "Number of Flights"})
# Add a title to the x-axis
fig.update_xaxes(title="Origin City")
fig.show()
Create a bar chart to show the number of flights per distance group. This can help identify the distribution of flight distances in the dataset.
distance_counts = df["DistanceGroup"].value_counts().sort_index()
fig = px.bar(distance_counts, x=distance_counts.index, y=distance_counts.values, labels={"x": "Distance Group", "y": "Number of Flights"})
# Add a title to the x-axis
fig.update_xaxes(title="Distance Groups")
fig.show()
Create a heatmap to visualize the delays by day of the week and airline. This can help identify patterns in delays for specific airlines on certain days.
import plotly.express as px
day_mapping = {1: 'Mon', 2: 'Tue', 3: 'Wed', 4: 'Thu', 5: 'Fri', 6: 'Sat', 7: 'Sun'}
heatmap_data = df.groupby(["DayOfWeek", "Marketing_Airline_Network"])["DepDelayMinutes"].mean().reset_index()
heatmap_data['DayOfWeek'] = heatmap_data['DayOfWeek'].map(day_mapping)
fig = px.density_heatmap(heatmap_data,
x="DayOfWeek",
y="Marketing_Airline_Network",
z="DepDelayMinutes",
nbinsx=7,
color_continuous_scale=px.colors.diverging.Tealrose,
labels={"DayOfWeek": "Day of Week", "Marketing_Airline_Network": "Airline", "DepDelayMinutes": "Average Delay (minutes)"})
fig.update_layout(
title="Average Delay by Day of Week and Airline",
coloraxis_colorbar=dict(
title="Average Delay (minutes)"
)
)
fig.show()
Create a line chart to show the daily flight volume over the period covered by the dataset. This can help identify trends in flight volume over time.
daily_flight_volume = df["FlightDate"].value_counts().sort_index()
fig = px.line(daily_flight_volume, x=daily_flight_volume.index, y=daily_flight_volume.values, labels={"x": "Date", "y": "Number of Flights"})
# Add a title to the x-axis
fig.update_xaxes(title="Flight Date")
fig.show()
Create a box plot to visualize the distribution of delays for each airline. This can help identify which airlines have the most variation in delay times.
fig = px.box(df, x="Marketing_Airline_Network", y="DepDelayMinutes", labels={"Marketing_Airline_Network": "Airline", "DepDelayMinutes": "Delay (minutes)"})
fig.show()
Create a scatter plot to show the relationship between flight distance and delay time. This can help identify whether longer flights tend to have more delays.
import plotly.express as px
fig = px.scatter(df,
x="Distance",
y="DepDelayMinutes",
color="DepDelayMinutes",
color_continuous_scale=px.colors.sequential.Pinkyl,
labels={"Distance": "Flight Distance (miles)", "DepDelayMinutes": "Delay (minutes)"}
)
fig.show()
state_counts = df["OriginState"].value_counts().reset_index()
state_counts.columns = ["State", "NumFlights"]
import plotly.express as px
fig = px.choropleth(state_counts,
locations="State",
color="NumFlights",
locationmode="USA-states",
scope="usa",
color_continuous_scale=px.colors.sequential.Teal,
labels={"NumFlights": "Number of Flights"},
title="Number of Flights by State")
fig.update_layout(margin={"r": 0, "t": 30, "l": 0, "b": 0})
fig.show()
state_delays = df.groupby("OriginState")["DepDelayMinutes"].mean().reset_index()
state_delays.columns = ["State", "AvgDelay"]
import plotly.express as px
fig = px.choropleth(state_delays,
locations="State",
color="AvgDelay",
locationmode="USA-states",
scope="usa",
color_continuous_scale=px.colors.diverging.Tealrose,
labels={"AvgDelay": "Average Delay (minutes)"},
title="Average Delay Time by State")
fig.update_layout(margin={"r": 0, "t": 30, "l": 0, "b": 0})
fig.show()
# import pandas as pd
# import plotly.express as px
# from geopy.geocoders import Nominatim
# geolocator = Nominatim(user_agent="my_flight_analysis_app")
# def get_coordinates(city_name):
# location = geolocator.geocode(city_name)
# if location:
# return location.latitude, location.longitude
# else:
# return None, None
# route_counts = df.groupby(["OriginCityName", "DestCityName"]).size().reset_index(name="NumFlights")
# N = 100 # Change this value to display a different number of top routes
# top_routes = route_counts.nlargest(N, "NumFlights")
# top_routes["OriginLatitude"], top_routes["OriginLongitude"] = zip(*top_routes["OriginCityName"].apply(get_coordinates))
# top_routes["DestLatitude"], top_routes["DestLongitude"] = zip(*top_routes["DestCityName"].apply(get_coordinates))
# fig = px.scatter_geo(top_routes,
# lat="OriginLatitude",
# lon="OriginLongitude",
# hover_name="OriginCityName",
# size_max=5,
# projection="natural earth")
# for _, row in top_routes.iterrows():
# fig.add_trace(px.line_geo(lat=[row["OriginLatitude"], row["DestLatitude"]],
# lon=[row["OriginLongitude"], row["DestLongitude"]])
# .data[0])
# # Add city names to the bubbles
# for _, row in top_routes.iterrows():
# fig.add_trace(
# go.Scattergeo(
# lat=[row["OriginLatitude"]],
# lon=[row["OriginLongitude"]],
# text=[row["OriginCityName"]],
# mode="text",
# textfont=dict(size=10, color="black"),
# showlegend=False,
# textposition="bottom center"
# )
# )
# fig.show()
# import plotly.graph_objs as go
# fig = go.Figure()
# # Customize map's appearance
# fig.update_geos(
# resolution=50,
# showcoastlines=True, coastlinecolor="Thistle",
# showland=True, landcolor="LightGreen",
# showocean=True, oceancolor="Azure",
# showlakes=True, lakecolor="LightBlue",
# showrivers=True, rivercolor="LightSteelBlue",
# showcountries=True, countrycolor="DarkOrange",
# showsubunits=True, subunitcolor="DarkOrange",
# projection_type="natural earth"
# )
# # Add flight routes
# for _, row in top_routes.iterrows():
# fig.add_trace(px.line_geo(lat=[row["OriginLatitude"], row["DestLatitude"]],
# lon=[row["OriginLongitude"], row["DestLongitude"]])
# .data[0])
# # Add city names to the bubbles
# for _, row in top_routes.iterrows():
# fig.add_trace(
# go.Scattergeo(
# lat=[row["OriginLatitude"]],
# lon=[row["OriginLongitude"]],
# text=[row["OriginCityName"]],
# mode="text",
# textfont=dict(size=10, color="black"),
# showlegend=False,
# textposition="bottom center"
# )
# )
# # Add city markers with hover text
# fig.add_trace(
# go.Scattergeo(
# lat=top_routes["OriginLatitude"],
# lon=top_routes["OriginLongitude"],
# hovertext=top_routes["OriginCityName"],
# mode="markers",
# marker=dict(size=6, color="red", symbol="circle", line=dict(width=1, color="black")),
# showlegend=False
# )
# )
# # Customize layout
# fig.update_layout(
# title="100 Top Flight Routes",
# title_x=0.5,
# geo=dict(
# scope="world",
# projection=dict(type="natural earth"),
# showland=True,
# landcolor="rgb(243, 243, 243)",
# countrycolor="rgb(204, 204, 204)",
# ),
# margin=dict(t=50, b=0, l=0, r=0)
# )
# fig.show()